home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Atari Mega Archive 2
/
Atari Mega Archive CD - Volume 2.iso
/
minix
/
up1510b.tgz
/
up1510b
/
src
/
commands
/
compress.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-07-23
|
38KB
|
1,604 lines
/* compress - Reduce file size using Modified Lempel-Ziv encoding */
/*
* compress.c - File compression ala IEEE Computer, June 1984.
*
* Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
*
* Richard Todd Port to MINIX
* Andy Tanenbaum Cleanup
*
*
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
* Usage: compress [-dfvc] [-b bits] [file ...]
* Inputs:
* -d: If given, decompression is done instead.
*
* -c: Write output on stdout.
*
* -b: Parameter limits the max number of bits/code.
*
* -f: Forces output file to be generated, even if one already
* exists, and even if no space is saved by compressing.
* If -f is not used, the user will be prompted if stdin is
* a tty, otherwise, the output file will not be overwritten.
*
* -v: Write compression statistics
*
* file ...: Files to be compressed. If none specified, stdin
* is used.
* Outputs:
* file.Z: Compressed form of file with same mode, owner, and utimes
* or stdout (if stdin used as input)
*
* Assumptions:
* When filenames are given, replaces with the compressed version
* (.Z suffix) only if the file decreases in size.
* Algorithm:
* Modified Lempel-Ziv method (LZW). Basically finds common
* substrings and replaces them with a variable size code. This is
* deterministic, and can be done on the fly. Thus, the decompression
* procedure needs no input table, but tracks the way the table was built.
*/
#define AZTEC86 1
#define min(a,b) ((a>b) ? b : a)
/*
* Set USERMEM to the maximum amount of physical user memory available
* in bytes. USERMEM is used to determine the maximum BITS that can be used
* for compression.
*
* SACREDMEM is the amount of physical memory saved for others; compress
* will hog the rest.
*/
#ifndef SACREDMEM
#define SACREDMEM 0
#endif
#ifndef USERMEM
# define USERMEM 450000 /* default user memory */
#endif
#define REGISTER register
#define DOTZ ".Z"
#ifdef AZTEC86
void prratio(),cl_block(),cl_hash(),output(),decompress(),
copystat(),writeerr(),compress(),Usage(),version();
#ifdef AZTECBITS
# define BITS AZTECBITS
#else
# define BITS 13
#endif
# undef USERMEM
#endif /* AZTEC86 */
#ifdef USERMEM
# if USERMEM >= (433484+SACREDMEM)
# define PBITS 16
# else
# if USERMEM >= (229600+SACREDMEM)
# define PBITS 15
# else
# if USERMEM >= (127536+SACREDMEM)
# define PBITS 14
# else
# if USERMEM >= (73464+SACREDMEM)
# define PBITS 13
# else
# define PBITS 12
# endif
# endif
# endif
# endif
# undef USERMEM
#endif /* USERMEM */
#ifdef PBITS /* Preferred BITS for this memory size */
# ifndef BITS
# define BITS PBITS
# endif
#endif /* PBITS */
#if BITS == 16
# define HSIZE 69001 /* 95% occupancy */
#endif
#if BITS == 15
# define HSIZE 35023 /* 94% occupancy */
#endif
#if BITS == 14
# define HSIZE 18013 /* 91% occupancy */
#endif
#if BITS == 13
# define HSIZE 9001 /* 91% occupancy */
#endif
#if BITS <= 12
# define HSIZE 5003 /* 80% occupancy */
#endif
/*
* a code_int must be able to hold 2**BITS values of type int, and also -1
*/
#if BITS > 15
typedef long int code_int;
#else
typedef int code_int;
#endif
#ifdef SIGNED_COMPARE_SLOW
typedef unsigned long int count_int;
typedef unsigned short int count_short;
#else
typedef long int count_int;
#endif
#ifdef NO_UCHAR
typedef char char_type;
#else
typedef unsigned char char_type;
#endif /* UCHAR */
char_type magic_header[] = { "\037\235" }; /* 1F 9D */
/* Defines for third byte of header */
#define BIT_MASK 0x1f
#define BLOCK_MASK 0x80
/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
a fourth header byte (for expansion).
*/
#define INIT_BITS 9 /* initial number of bits/code */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <signal.h>
#include <stdio.h>
#define ARGVAL() (*++(*argv) || (--argc && *++argv))
int n_bits; /* number of bits/code */
int maxbits = BITS; /* user settable max # bits/code */
code_int maxcode; /* maximum code, given n_bits */
code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
#ifdef COMPATIBLE /* But wrong! */
# define MAXCODE(n_bits) (1 << (n_bits) - 1)
#else
# define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
#endif /* COMPATIBLE */
#ifndef AZTEC86
count_int htab [HSIZE];
unsigned short codetab [HSIZE];
#else
count_int *htab;
unsigned short *codetab;
# define HTABSIZE ((unsigned)(HSIZE*sizeof(count_int)))
# define CODETABSIZE ((unsigned)(HSIZE*sizeof(unsigned short)))
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
#endif /* XENIX_16 */
code_int hsize = HSIZE; /* for dynamic table sizing */
count_int fsize;
/*
* To save much memory, we overlay the table used by compress() with those
* used by decompress(). The tab_prefix table is the same size and type
* as the codetab. The tab_suffix table needs 2**BITS characters. We
* get this from the beginning of htab. The output stack uses the rest
* of htab, and contains characters. There is plenty of room for any
* possible stack (stack used to be 8000 characters).
*/
#define tab_prefixof(i) codetabof(i)
#ifdef XENIX_16
# define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
# define de_stack ((char_type *)(htab2))
#else /* Normal machine */
# define tab_suffixof(i) ((char_type *)(htab))[i]
# define de_stack ((char_type *)&tab_suffixof(1<<BITS))
#endif /* XENIX_16 */
code_int free_ent = 0; /* first unused entry */
int exit_stat = 0;
code_int getcode();
void Usage() {
#ifdef DEBUG
fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");
}
int debug = 0;
#else
fprintf(stderr,"Usage: compress [-dfvcV] [-b maxbits] [file ...]\n");
}
#endif /* DEBUG */
int nomagic = 0; /* Use a 3-byte magic number header, unless old file */
int zcat_flg = 0; /* Write output on stdout, suppress messages */
int quiet = 0; /* don't tell me about compression */
/*
* block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
int block_compress = BLOCK_MASK;
int clear_flg = 0;
long int ratio = 0;
#define CHECK_GAP 10000 /* ratio check interval */
count_int checkpoint = CHECK_GAP;
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
int force = 0;
char ofname [100];
#ifdef DEBUG
int verbose = 0;
#endif /* DEBUG */
#ifndef METAWARE
#ifdef AZTEC86
void
#else
int
#endif
(*bgnd_flag)();
#endif
int do_decomp = 0;
void main( argc, argv )
REGISTER int argc; char **argv;
{
int overwrite = 0; /* Do not overwrite unless given -f flag */
char tempname[100];
char **filelist, **fileptr;
char *cp;extern char *strrchr(), *malloc();
struct stat statbuf;
#ifndef METAWARE
extern void onintr(), oops();
if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
signal ( SIGINT, onintr );
signal ( SIGSEGV, oops );
}
#endif
#ifdef AZTEC86
#ifdef METAWARE
_setmode(NULL,_ALL_FILES_BINARY);
_setmode(stdin,_BINARY);
_setmode(stdout,_BINARY);
_setmode(stderr,_TEXT);
#endif
if (NULL == (htab = (count_int *)malloc(HTABSIZE)))
{
fprintf(stderr,"Can't allocate htab\n");
exit(1);
}
if (NULL == (codetab = (unsigned short *)malloc(CODETABSIZE)))
{
fprintf(stderr,"Can't allocate codetab\n");
exit(1);
}
#endif
#ifdef COMPATIBLE
nomagic = 1; /* Original didn't have a magic number */
#endif /* COMPATIBLE */
filelist = fileptr = (char **)(malloc((unsigned)(argc * sizeof(*argv))));
*filelist = NULL;
if((cp = strrchr(argv[0], '/')) != 0) {
cp++;
} else {
cp = argv[0];
}
if(strcmp(cp, "uncompress") == 0) {
do_decomp = 1;
} else if(strcmp(cp, "zcat") == 0) {
do_decomp =